package org.wyz.spark;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;

import java.util.Arrays;
import java.util.List;

public class WordCount {


    private static final String[] ARGS = {"data/input/wordcount/a.txt", "data/output/wordcount"};

    public static void main(String[] args) {
        SparkConf conf = new SparkConf().setMaster("local").setAppName("wyz-wc");
        JavaSparkContext sc = new JavaSparkContext(conf);

        JavaRDD<String> rdd = sc.textFile(ARGS[0]);

        rdd.flatMap(a -> Arrays.asList(a.split("\\s")).iterator())
                .mapToPair(a -> new Tuple2<>(a, 1))
                .reduceByKey(Integer::sum)
                .map(a -> a._1 + " " + a._2)
                .saveAsTextFile(ARGS[1]);

        sc.stop();
    }


}
